#setwd("C:/Users/deram/OneDrive - IAEA/My Documents/Spiro_Wigg_paper/ready")
setwd("C:/Users/deram/OneDrive - IAEA/Desktop/Dera/Dera 2/manuscript Spiroplasma in Gt/manuscipt_02032022/analyse")

#Call libraries
library(ggplot2)
library(lattice)
library(gcookbook)
library(ggfortify)
library(datasets)
library(MASS)
library(knitr)
library(coxme)
library(lme4)
library(nlme)
library(tidyverse)
library(gapminder)
library(tufte)
library(dplyr)
library(ggpubr)
library(tufte)
library(datasets)
library(MASS)
library(rmarkdown)
library(tufte)
library(ggthemes)
library(AICcmodavg)
library(car)

## Prevalence of Spiroplasma

spi2 <- read.csv("data_Tryp_Spiro.csv")
str(spi2)
attach(spi2)
head(spi2)
spi=na.omit(spi2)
summary(spi2)

#transform variables to factor
spi2 <- transform(spi2, Country = factor(Country),
                  Location = factor(Location),
                  Sex = factor(Sex))
### test of normality
shapiro.test(spi2$Prev_Spiro) #p>0.05 so data are normalized distributed

###Best model for the evaluation of Spiroplasma prevalence
model1 <- glm(Prev_Spiro ~ Country, data = spi2, family = gaussian())
model2 <- glm(Prev_Spiro ~ Location, data = spi2, family = gaussian())
model3 <- glm(Prev_Spiro ~ Sex, data = spi2, family = gaussian())
model4 <- glm(Prev_Spiro ~ Country*Location, data = spi2, family = gaussian())
model5 <- glm(Prev_Spiro ~ Country+Location, data = spi2, family = gaussian())
model6 <- glm(Prev_Spiro ~ Country*Sex, data = spi2, family = gaussian())
model7 <- glm(Prev_Spiro ~ Country+Sex, data = spi2, family = gaussian())
model8 <- glm(Prev_Spiro ~ Location*Sex, data = spi2, family = gaussian())
model9 <- glm(Prev_Spiro ~ Location+Sex, data = spi2, family = gaussian())
model10 <- glm(Prev_Spiro ~ Country+Location+Sex, data = spi2, family = gaussian())
model11 <- glm(Prev_Spiro ~ Country*Location*Sex, data = spi2, family = gaussian())
model12 <- glm(Prev_Spiro ~ Prev_Tspp, data = spi2, family = gaussian())
model13 <- glm(Prev_Spiro ~ Country + Prev_Tspp, data = spi2, family = gaussian())
model14 <- glm(Prev_Spiro ~ Country*Prev_Tspp, data = spi2, family = gaussian())
model15 <- glm(Prev_Spiro ~ Location + Prev_Tspp, data = spi2, family = gaussian())

library(MuMIn)
AICc(model1,model2, model3, model4,model5, model6, model7, model8, model9, model10, model11, model12, model13, model14, model15) #mod is the best model
summary(model11)
summary(model3)

###Check for model overdispersion with Bolker's function
overdisp_fun <- function(model11) {
  rdf <- df.residual(model11)
  rp <- residuals(model11,type="pearson")
  Pearson.chisq <- sum(rp^2)
  prat <- Pearson.chisq/rdf
  pval <- pchisq(Pearson.chisq, df=rdf, lower.tail=FALSE)
  c(chisq=Pearson.chisq,ratio=prat,rdf=rdf,p=pval)
  }

overdisp_fun(model11)

#difference by country
summary(model1)
anova(model1)
Anova(model1)
#difference between the locations
summary(model2)
anova(model2)
Anova(model2)
#difference between the sex
summary(model3)
anova(model3)
Anova(model3)
 
# only location is significant, so we will present the plot according to the location

spi2$Location <- relevel(spi2$Location, ref= "Mortani")
model1<-lm(Prev_Spiro ~ Location, data = spi2, family = gaussian())
summary(model1)

spi2$Location <- relevel(spi2$Location, ref= "Folonzo")
model1<-glm(Prev_Spiro ~ Location, data = spi2, family = gaussian())
summary(model1)

spi2$Location <- relevel(spi2$Location, ref= "Cirdes")
model1<-glm(Prev_Spiro ~ Location, data = spi2, family = gaussian())
summary(model1)
#it"s only on Mortani that there is a significant difference

###plot of the prevalence of Spiroplasma according to the location
spi.tiff4<-ggplot(spi2,aes(x=Location ,y=Prev_Spiro, fill = Country)) +
  geom_boxplot() + geom_jitter(width=0.1,alpha=0.2)+ ylim(0, 100)
spi.tiff4


tiff("spi.tiff4", width = 7, height = 4, units = 'in', res = 300)
plot(spi.tiff4+theme_tufte() + theme(axis.line = element_line(size = 1, colour = "black"))) + xlab(expression(bold("Location"))) + ylab( expression (paste (bold("Prevalence of "), bolditalic(" Spiroplasma (%)"), )))
dev.off()

##Significant difference within the location in each country
#for Ghana
spi2_Gh <- subset(spi2,Country=="Ghana")
spi2_Gh

spi2_Gh$Location<- as.factor(spi2_Gh$Location)
spi2_Gh$Location <-relevel(spi2_Gh$Location, ref = "Mortani" )
model1<-glm(Prev_Spiro ~ Location, data = spi2_Gh, family = gaussian())
summary(model1)
anova(model1)
Anova(model1)

#For Burkina Faso
spi2_BKF <- subset(spi2,Country=="Burkina Faso")
spi2_BKF

spi2_BKF$Location<- as.factor(spi2_BKF$Location)
spi2_Gh$Location <-relevel(spi2_Gh$Location, ref = "Comoe" )
model1<-glm(Prev_Spiro ~ Location, data = spi2_BKF, family = gaussian())
summary(model1)
anova(model1)
Anova(model1)

## Analysis of Trypanosoma infection

##For Tspp regardless the country
model1<-glm(Prev_Tspp ~ Country, data = spi2)
summary(model1)
anova(model1)
Anova(model1)

model1<-glm(Prev_Tspp ~ Location, data = spi2)
summary(model1)
anova(model1)
Anova(model1)

model1<-glm(Prev_Tspp ~ Sex, data = spi2)
summary(model1)
anova(model1)
Anova(model1)

#for Tspp in Burkina Faso
spi2_BKF <- subset(spi2,Country=="Burkina Faso")
spi2_BKF

spi2_BKF$Location<- as.factor(spi2_BKF$Location)
spi2_BKF$Location <-relevel(spi2_BKF$Location, ref = "Comoe" )
model1<-glm(Prev_Tspp ~ Location, data = spi2_BKF, family = gaussian())
summary(model1)
anova(model1)
Anova(model1)

#For Tspp in Ghana
spi2_Gh <- subset(spi2,Country=="Ghana")
spi2_Gh

spi2_Gh$Location<- as.factor(spi2_Gh$Location)
spi2_Gh$Location <-relevel(spi2_Gh$Location, ref = "Mortani" )
model1<-glm(Prev_Tspp ~ Location, data = spi2_Gh, family = gaussian())
summary(model1)
anova(model1)
Anova(model1)

#For Tc
model1<-glm(Prev_Tc ~ Country, data = spi2)
summary(model1)
anova(model1)
Anova(model1)

model1<-glm(Prev_Tc ~ Location, data = spi2)
summary(model1)
anova(model1)
Anova(model1)

model1<-glm(Prev_Tc ~ Sex, data = spi2)
summary(model1)
anova(model1)
Anova(model1)

#Tv
model1<-glm(Prev_Tv ~ Country, data = spi2)
summary(model1)
anova(model1)
Anova(model1)

model1<-glm(Prev_Tv ~ Location, data = spi2)
summary(model1)
anova(model1)
Anova(model1)

model1<-glm(Prev_Tv ~ Sex, data = spi2)
summary(model1)
anova(model1)
Anova(model1)

#Tz
model1<-glm(Prev_Tz ~ Country, data = spi2)
summary(model1)
anova(model1)
Anova(model1)

model1<-glm(Prev_Tz ~ Location, data = spi3)
summary(model1)
anova(model1)
Anova(model1)

model1<-glm(Prev_Tz ~ Sex, data = spi3)
summary(model1)
anova(model1)
Anova(model1)

#TcTv
model1<-glm(Prev_TcTv ~ Country, data = spi2)
summary(model1)
anova(model1)
Anova(model1) #p<0.05

model1<-glm(Prev_TcTv ~ Location, data = spi2)
summary(model1)
anova(model1)
Anova(model1)

model1<-glm(Prev_TcTv ~ Sex, data = spi2)
summary(model1)
anova(model1)
Anova(model1)

#TcTz
model1<-glm(Prev_TcTz ~ Country, data = spi2)
summary(model1)
anova(model1)
Anova(model1)

model1<-glm(Prev_TcTz ~ Location, data = spi2)
summary(model1)
anova(model1)
Anova(model1) #p<0.05

model1<-glm(Prev_TcTz ~ Sex, data = spi2)
summary(model1)
anova(model1)
Anova(model1)

#TvTz
model1<-glm(Prev_TvTz ~ Country, data = spi2)
summary(model1)
anova(model1)
Anova(model1)

model1<-glm(Prev_TvTz ~ Location, data = spi2)
summary(model1)
anova(model1)
Anova(model1)

model1<-glm(Prev_TvTz ~ Sex, data = spi2)
summary(model1)
anova(model1)
Anova(model1)

#TcTvTz
model1<-glm(Prev_TcTvTz ~ Country, data = spi2)
summary(model1)
anova(model1)
Anova(model1)

model1<-glm(Prev_TcTvTz ~ Location, data = spi2)
summary(model1)
anova(model1)
Anova(model1) #p<0.05

model1<-glm(Prev_TcTvTz ~ Sex, data = spi2)
summary(model1)
anova(model1)
Anova(model1)


##qPCR data analysis
spi <- read.csv("data_qPCR_Gt_Spiro_Tryp_Wig.csv")
str(spi)
attach(spi)
head(spi)
spi=na.omit(spi)
summary(spi)

# transform into numeric and factor
spi$Normalized_Tryp=as.numeric(spi$Normalized_Tryp)
spi$Normalized_Wig =as.numeric(spi$Normalized_Wig)
spi$Normalized_Spiro =as.numeric(spi$Normalized_Spiro)
spi$Infection_type =as.factor(spi$Infection_type)

str(spi)

########## Spi
spi.tiff1<-ggplot(spi,aes(x=Infection_type ,y=Normalized_Spiro, fill = Infection_type)) +
  geom_boxplot() + geom_jitter(width=0.1,alpha=0.2)+ ylim(0, 10)
spi.tiff1

model1<-glm(Normalized_Spiro ~ Infection_type, data = spi)
summary(model1)
anova(model1)
Anova(model1)

tiff("spi.tiff1", width = 4, height = 4, units = 'in', res = 300)
plot(spi.tiff1+theme_tufte() + theme(axis.line = element_line(size = 1, colour = "black"))) + xlab(expression(bolditalic("infection type"))) + ylab( expression (paste (bold("Normalized density of "), bolditalic(" Spiroplasma"), )))
dev.off()


########## Wig

spi.tiff2<-ggplot(spi,aes(x=Infection_type ,y=Normalized_Wig, fill = Infection_type)) +
  geom_boxplot() + geom_jitter(width=0.1,alpha=0.2)+ ylim(0, 8)
spi.tiff2

model1<-glm(Normalized_Wig ~ Infection_type, data = spi)
summary(model1)
anova(model1)
Anova(model1)

tiff("spi.tiff2", width = 4, height = 4, units = 'in', res = 300)
plot(spi.tiff2+theme_tufte() + theme(axis.line = element_line(size = 1, colour = "black"))) + xlab(expression(bolditalic("infection type"))) + ylab( expression (paste (bold("Normalized density of "), bolditalic(" Wigglesworthia"), )))
dev.off()

######## tryp


spi.tiff3<-ggplot(spi,aes(x=Infection_type ,y=Normalized_Tryp, fill = Infection_type)) +
  geom_boxplot() + geom_jitter(width=0.1,alpha=0.2)+ ylim(0, 6)
spi.tiff3

model1<-glm(Normalized_Tryp ~ Infection_type, data = spi)
summary(model1)
anova(model1)
Anova(model1)

tiff("spi.tiff3", width = 4, height = 4, units = 'in', res = 300)
plot(spi.tiff3+theme_tufte() + theme(axis.line = element_line(size = 1, colour = "black"))) + xlab(expression(bolditalic("infection type"))) + ylab( expression (paste (bold("Normalized density of "), bolditalic(" Trypanosoma"), )))
dev.off()

######### Correlations fig

reg1a<-ggplot(spi, aes(Normalized_Wig, Normalized_Spiro, col = Infection_type, fill = Infection_type)) + 
  geom_point(size = 3, shape = 21, col = "black") +
  geom_vline(xintercept = 2.1, color = "black", size=1)+
  xlab(expression(italic("Wigglesworthis")))
reg1a

tiff("reg1a.tiff", width = 4, height = 4, units = 'in', res = 300)
plot(reg1a+theme_tufte() + theme(axis.line = element_line(size = 1, colour = "black"))+ theme(legend.position = c(.95, .95),legend.justification = c("right", "top"))) + xlab(expression(bolditalic("Wigglesworthia"))) + ylab(expression(bolditalic("Spiroplasma")))
dev.off()

reg1b<-ggplot(spi, aes(Normalized_Tryp, Normalized_Spiro, col = Infection_type, fill = Infection_type)) + 
  geom_point(size = 3, shape = 21, col = "black") +
  geom_vline(xintercept = 2.1, color = "black", size=1)+
  xlab(expression(italic("Wigglesworthis")))
reg1b

tiff("reg1b.tiff", width = 4, height = 4, units = 'in', res = 300)
plot(reg1a+theme_tufte() + theme(axis.line = element_line(size = 1, colour = "black"))+ theme(legend.position = c(.95, .95),legend.justification = c("right", "top"))) + xlab(expression(bolditalic("Trypanosoma"))) + ylab(expression(bolditalic("Spiroplasma")))
dev.off()